This notebook estimates the indicators based on the raw data and
perfomrs the main analyses and figures used in the manuscript of the
multicountry paper. The input is the “clean kobo output” that was first
cleaned by 1.2_cleaning.
Load required libraries:
library(tidyr)
library(dplyr)
library(utile.tools)
library(stringr)
library(ggplot2)
library(ggsankey)
library(alluvial)
library(viridis)
library(cowplot)
Load required functions. These custom fuctions are available at: https://github.com/AliciaMstt/GeneticIndicators
source("get_indicator1_data.R")
source("get_indicator2_data.R")
source("get_indicator3_data.R")
source("get_metadata.R")
source("transform_to_Ne.R")
source("estimate_indicator1.R")
Other custom functions:
### not in
'%!in%' <- function(x,y)!('%in%'(x,y))
#' Duplicates data to create additional facet. Thanks to https://stackoverflow.com/questions/18933575/easily-add-an-all-facet-to-facet-wrap-in-ggplot2
#' @param df a dataframe
#' @param col the name of facet column
#'
CreateAllFacet <- function(df, col){
df$facet <- df[[col]]
temp <- df
temp$facet <- "all"
merged <-rbind(temp, df)
# ensure the facet value is a factor
merged[[col]] <- as.factor(merged[[col]])
return(merged)
}
Custom colors:
## IUCN official colors
# Assuming order of levels is: "re", "cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown" (for regional, and w/o "re" for global). Make sure to change the levels to that order before plotting.
IUCNcolors<-c("brown2", "darkorange", "yellow", "green", "darkgreen", "darkgrey", "azure2", "bisque1")
IUCNcolors_regional<-c("darkorchid2", "brown2", "darkorange", "yellow", "green", "darkgreen", "darkgrey", "azure2", "bisque1")
## nice soft ramp for taxonomic groups
taxoncolors<-cividis(12) # same than using cividis(length(levels(as.factor(metadata$taxonomic_group))))
## Colors for simplified methods to define populations
# assuming the levels (see how this was created in the section "Simplify combinations of methods to define populations"): of running levels(as.factor(ind2_data$defined_populations_simplified))
# get a set of colors to highlight genetic and geographic with similar colors
simplifiedmethods_colors<-c("#b34656", #"adaptive_traits management_units"
"#b34656", # "management_units"
"#7f611b", # "eco_biogeo_proxies"
"#668cd1", # "genetic_clusters"
"#668cd1", # "genetic_clusters eco_biogeo_proxies"
"#45c097", # "genetic_clusters geographic_boundaries"
"#d4b43e", # "geographic_boundaries"
"#d4b43e", # "geographic_boundaries adaptive_traits"
"#d4b43e", # "geographic_boundaries eco_biogeo_proxies"
"#d4b43e", # "geographic_boundaries management_units"
"#be72c9", # "low_freq_combinations"
"#be72c9")# "other"
Get indicators and metadata data from clean kobo output
# Get data:
kobo_clean<-read.csv(file="kobo_output_clean.csv", header=TRUE)
# Extract indicator 2 data from kobo output, show most relevant columns
ind2_data<-get_indicator2_data(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(ind2_data[,c(1:3, 9:10,13)])
# Extract indicator 3 data from kobo output, show most relevant columns
ind3_data<-get_indicator3_data(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(ind3_data[,c(1:3, 9:11)])
# extract metadata, show most relevant columns
metadata<-get_metadata(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(metadata[,c(1:3, 12, 25,26, 64)])
Some taxa were assessed twice or more times, for example to account
for uncertainty on how to divide populations. This information is stored
in variable multiassessment of the metadata (created by
get_metadata()). An example of taxa with multiple
assessments:
metadata %>%
filter(multiassessment=="multiassessment") %>%
select(taxonomic_group, taxon, country_assessment, multiassessment) %>%
arrange(taxon, country_assessment) %>%
head()
Extract indicator 1 data from kobo output, show most relevant columns
ind1_data<-get_indicator1_data(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(ind1_data[,c(1:3, 12:14)])
Remember what the function to transform NcRange and NcPoint data into Ne does:
# check what the custom funciton does
transform_to_Ne
## function (ind1_data)
## {
## ind1_data = ind1_data
## ind1_data <- ind1_data %>% mutate(Nc_from_range = case_when(NcRange ==
## "more_5000_bymuch" ~ 5001, NcRange == "more_5000" ~ 5001,
## NcRange == "less_5000_bymuch" ~ 100, NcRange == "less_5000" ~
## 100, NcRange == "range_includes_5000" ~ 5001)) %>%
## mutate(Ne_from_Nc = case_when(!is.na(NcPoint) ~ NcPoint *
## 0.1, !is.na(Nc_from_range) ~ Nc_from_range * 0.1)) %>%
## mutate(Ne_combined = if_else(is.na(Ne), Ne_from_Nc, Ne))
## print(ind1_data)
## }
Use function to get Ne data from NcRange or NcPoint data, and their combination (Ne estimated from Ne if Ne is available, otherwise, from Nc)
ind1_data<-transform_to_Ne(ind1_data = ind1_data)
## # A tibble: 2,549 × 38
## country_assessme… taxonomic_group taxon scientific_auth… genus year_assesment
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 sweden mammal Alce… (Linnaeus, 1758) Alces 2023
## 2 sweden mammal Alce… (Linnaeus, 1758) Alces 2023
## 3 sweden mammal Alce… (Linnaeus, 1758) Alces 2023
## 4 sweden fish Silu… (Linnaeus, 1758) Silu… 2023
## 5 sweden fish Silu… (Linnaeus, 1758) Silu… 2023
## 6 sweden fish Silu… (Linnaeus, 1758) Silu… 2023
## 7 sweden fish Silu… (Linnaeus, 1758) Silu… 2023
## 8 sweden fish Silu… (Linnaeus, 1758) Silu… 2023
## 9 sweden fish Silu… (Linnaeus, 1758) Silu… 2023
## 10 sweden bird Dend… Bechstein 1803 Dend… 2022
## # … with 2,539 more rows, and 32 more variables: name_assessor <chr>,
## # email_assessor <chr>, kobo_tabular <chr>, time_populations <chr>,
## # X_validation_status <chr>, X_uuid <chr>, multiassessment <chr>,
## # population <chr>, Name <chr>, Origin <chr>, IntroductionYear <chr>,
## # Ne <dbl>, NeLower <dbl>, NeUpper <dbl>, NeYear <chr>, GeneticMarkers <chr>,
## # GeneticMarkersOther <chr>, MethodNe <chr>, SourceNe <chr>, NcType <chr>,
## # NcYear <chr>, NcMethod <chr>, NcRange <chr>, NcRangeDetails <chr>, …
Remember what the function to estimate indicator 1 does:
# check what the custom function does
estimate_indicator1
## function (ind1_data)
## {
## indicator1 <- ind1_data %>% group_by(X_uuid, ) %>% summarise(n_pops = n(),
## n_pops_Ne_data = sum(!is.na(Ne_combined)), n_pops_more_500 = sum(Ne_combined >
## 500, na.rm = TRUE), indicator1 = n_pops_more_500/n_pops_Ne_data) %>%
## left_join(metadata)
## print(indicator1)
## }
Now estimate indicator 1 :)
indicator1<-estimate_indicator1(ind1_data = ind1_data)
## Joining, by = "X_uuid"
## # A tibble: 547 × 69
## X_uuid n_pops n_pops_Ne_data n_pops_more_500 indicator1 country_assessm…
## <chr> <int> <int> <int> <dbl> <chr>
## 1 010d85cd-5… 2 1 1 1 united_states
## 2 016d59ae-9… 1 1 0 0 mexico
## 3 019bd95f-b… 1 1 0 0 sweden
## 4 01b10b29-9… 1 1 1 1 south_africa
## 5 0301e6b3-b… 3 3 3 1 france
## 6 037d6c8f-7… 4 2 2 1 united_states
## 7 03f03179-1… 1 1 1 1 south_africa
## 8 0586b61e-7… 12 12 0 0 belgium
## 9 065a53ba-0… 1 1 0 0 south_africa
## 10 06e6bb50-3… 1 1 0 0 belgium
## # … with 537 more rows, and 63 more variables: taxonomic_group <chr>,
## # taxon <chr>, scientific_authority <chr>, genus <chr>, year_assesment <chr>,
## # name_assessor <chr>, email_assessor <chr>, common_name <chr>,
## # kobo_tabular <chr>, X_validation_status <chr>, GBIF_taxonID <int>,
## # NCBI_taxonID <chr>, national_taxonID <chr>, source_national_taxonID <chr>,
## # other_populations <chr>, time_populations <chr>, defined_populations <chr>,
## # source_definition_populations <chr>, map_populations <chr>, …
Indicator 2 is the he proportion of populations within species which
are maintained. This can be estimated based on the
n_extant_populations and n_extint_populations,
as follows:
ind2_data$indicator2<- ind2_data$n_extant_populations / (ind2_data$n_extant_populations + ind2_data$n_extint_populations)
head(ind2_data$indicator2)
## [1] 1.0000000 0.5000000 0.2941176 1.0000000 0.3333333 1.0000000
Indicator 3 refers to the number (count) of taxa by country in which
genetic monitoring is occurring. This is stored in the variable
temp_gen_monitoring as a “yes/no” answer for each taxon, so
to estimate the indicator, we only need to count how many said “yes”,
keeping only one of the records when the taxon was multiassessed:
indicator3<-ind3_data %>%
# keep only one record if the taxon was assessed more than once within the country
select(country_assessment, taxon, temp_gen_monitoring) %>%
filter(!duplicated(.)) %>%
# count "yes" in tem_gen_monitoring by country
filter(temp_gen_monitoring=="yes") %>%
group_by(country_assessment) %>%
summarise(n_taxon_gen_monitoring= n())
It could be useful to have the estimated indicator and the metadata in a single large table.
indicators_full<-left_join(metadata, indicator1) %>%
left_join(ind2_data) %>%
left_join(ind3_data)
## Joining, by = c("country_assessment", "taxonomic_group", "taxon",
## "scientific_authority", "genus", "year_assesment", "name_assessor",
## "email_assessor", "common_name", "kobo_tabular", "X_validation_status",
## "X_uuid", "GBIF_taxonID", "NCBI_taxonID", "national_taxonID",
## "source_national_taxonID", "other_populations", "time_populations",
## "defined_populations", "source_definition_populations", "map_populations",
## "map_populations_URL", "habitat_decline_area", "source_populations",
## "popsize_data", "ne_pops_exists", "nc_pops_exists", "ratio_exists",
## "species_related", "ratio_species_related", "ratio_year",
## "source_popsize_ratios", "species_comments", "realm", "IUCN_habitat",
## "other_habitat", "national_endemic", "transboundary_type", "other_explain",
## "country_proportion", "species_range", "rarity", "occurrence_extent",
## "occurrence_area", "pop_fragmentation_level", "species_range_comments",
## "global_IUCN", "regional_redlist", "other_assessment_status",
## "other_assessment_name", "source_status_distribution", "fecundity",
## "semelparous_offpring", "reproductive_strategy", "reproductive_strategy_other",
## "adult_age_data", "other_reproductive_strategy", "longevity_max",
## "longevity_median", "longevity_maturity", "longevity_age",
## "life_history_based_on", "life_history_sp_basedon", "sources_life_history",
## "multiassessment")
## Joining, by = c("country_assessment", "taxonomic_group", "taxon",
## "scientific_authority", "genus", "year_assesment", "name_assessor",
## "email_assessor", "X_validation_status", "X_uuid", "other_populations",
## "time_populations", "defined_populations", "source_definition_populations",
## "map_populations", "map_populations_URL", "habitat_decline_area",
## "source_populations", "multiassessment")
## Joining, by = c("country_assessment", "taxonomic_group", "taxon",
## "scientific_authority", "genus", "year_assesment", "name_assessor",
## "email_assessor", "X_validation_status", "X_uuid", "multiassessment")
Save indicators data and metadata to csv files, useful for analyses outside R.
# save processed data
write.csv(ind1_data, "ind1_data.csv", row.names = FALSE, fileEncoding = "UTF-8")
write.csv(indicators_full, "indicators_full.csv", row.names = FALSE, fileEncoding = "UTF-8")
write.csv(ind2_data, "ind2_data.csv", row.names = FALSE, fileEncoding = "UTF-8")
write.csv(ind3_data, "ind3_data.csv", row.names = FALSE, fileEncoding = "UTF-8")
write.csv(metadata, "metadata.csv", row.names = FALSE, fileEncoding = "UTF-8")
To have nice levels in the plots we will change the way country names are written:
# make factor
metadata$country_assessment<-as.factor(metadata$country_assessment)
indicators_full$country_assessment<-as.factor(indicators_full$country_assessment)
ind2_data$country_assessment<-as.factor(ind2_data$country_assessment)
ind1_data$country_assessment<-as.factor(ind1_data$country_assessment)
indicator1$country_assessment<-as.factor(indicator1$country_assessment)
# original levels
levels(metadata$country_assessment)
## [1] "australia" "belgium" "colombia" "france"
## [5] "japan" "mexico" "south_africa" "sweden"
## [9] "united_states"
# change
levels(metadata$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")
levels(ind1_data$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")
levels(ind2_data$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")
levels(indicator1$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")
The methods used to define populations come from a check box question were one or more of the following categories can be selected: genetic_clusters, geographic_boundaries, eco_biogeo_proxies, adaptive_traits, management_units, other. As a consequence any combination of the former can be possible. Leading to the following frequency table:
table(ind2_data$defined_populations)
##
## adaptive_traits
## 5
## adaptive_traits management_units
## 20
## eco_biogeo_proxies
## 41
## eco_biogeo_proxies adaptive_traits
## 2
## eco_biogeo_proxies management_units
## 10
## eco_biogeo_proxies other
## 2
## genetic_clusters
## 107
## genetic_clusters adaptive_traits
## 7
## genetic_clusters eco_biogeo_proxies
## 20
## genetic_clusters eco_biogeo_proxies adaptive_traits
## 3
## genetic_clusters eco_biogeo_proxies adaptive_traits management_units
## 2
## genetic_clusters eco_biogeo_proxies management_units
## 1
## genetic_clusters geographic_boundaries
## 74
## genetic_clusters geographic_boundaries adaptive_traits
## 5
## genetic_clusters geographic_boundaries eco_biogeo_proxies
## 7
## genetic_clusters geographic_boundaries eco_biogeo_proxies adaptive_traits
## 1
## genetic_clusters geographic_boundaries eco_biogeo_proxies adaptive_traits management_units
## 1
## genetic_clusters geographic_boundaries eco_biogeo_proxies management_units
## 1
## genetic_clusters geographic_boundaries management_units
## 8
## genetic_clusters management_units
## 11
## genetic_clusters other
## 2
## geographic_boundaries
## 276
## geographic_boundaries adaptive_traits
## 32
## geographic_boundaries adaptive_traits management_units
## 12
## geographic_boundaries adaptive_traits management_units other
## 1
## geographic_boundaries eco_biogeo_proxies
## 75
## geographic_boundaries eco_biogeo_proxies adaptive_traits
## 3
## geographic_boundaries eco_biogeo_proxies management_units
## 3
## geographic_boundaries eco_biogeo_proxies other
## 2
## geographic_boundaries management_units
## 25
## geographic_boundaries other
## 12
## management_units
## 132
## management_units other
## 2
## other
## 19
It is hard to group the above methods, so we will keep the original groups with n >=19 in the above list, and tag the combinations that appear few times as as “low_freq_combinations”.
Which groups have n>=19?
x<-as.data.frame(table(ind2_data$defined_populations)[table(ind2_data$defined_populations) >= 19])
colnames(x)[1]<-"method"
x
We can add this new column to the metadata and indicator 2 data:
### for ind2_data
ind2_data<- ind2_data %>%
mutate(defined_populations_simplified = case_when(
# if the method is in the list of methods n>=19 then keep it
defined_populations %in% x$method ~ defined_populations,
TRUE ~ "low_freq_combinations"))
### for meta
metadata<- metadata %>%
mutate(defined_populations_simplified = case_when(
# if the method is in the list of methods n>=19 then keep it
defined_populations %in% x$method ~ defined_populations,
TRUE ~ "low_freq_combinations"))
Check n for simplified methods:
table(ind2_data$defined_populations_simplified)
##
## adaptive_traits management_units
## 20
## eco_biogeo_proxies
## 41
## genetic_clusters
## 107
## genetic_clusters eco_biogeo_proxies
## 20
## genetic_clusters geographic_boundaries
## 74
## geographic_boundaries
## 276
## geographic_boundaries adaptive_traits
## 32
## geographic_boundaries eco_biogeo_proxies
## 75
## geographic_boundaries management_units
## 25
## low_freq_combinations
## 103
## management_units
## 132
## other
## 19
Table of equivalences:
ind2_data %>%
select(defined_populations, defined_populations_simplified) %>%
filter(!duplicated(defined_populations))
Create nicer names for ploting
# original method names
levels(as.factor(ind2_data$defined_populations_simplified))
## [1] "adaptive_traits management_units"
## [2] "eco_biogeo_proxies"
## [3] "genetic_clusters"
## [4] "genetic_clusters eco_biogeo_proxies"
## [5] "genetic_clusters geographic_boundaries"
## [6] "geographic_boundaries"
## [7] "geographic_boundaries adaptive_traits"
## [8] "geographic_boundaries eco_biogeo_proxies"
## [9] "geographic_boundaries management_units"
## [10] "low_freq_combinations"
## [11] "management_units"
## [12] "other"
# nice methods names in original order
nice_names_or <- c("adaptive traits & management units",
"eco- biogeographic proxies",
"genetic clusters",
"genetic clusters & eco- biogeographic proxies",
"genetic clusters & geographic boundaries",
"geographic boundaries",
"geographic boundaries & adaptive traits",
"geographic boundaries & eco- biogeographic proxies",
"geographic boundaries & management units",
"low frequency combinations",
"management units",
"others")
# nice names in better order for plotting
nice_names <- c("adaptive traits & management units",
"management units",
"eco- biogeographic proxies",
"genetic clusters",
"genetic clusters & eco- biogeographic proxies",
"genetic clusters & geographic boundaries",
"geographic boundaries",
"geographic boundaries & adaptive traits",
"geographic boundaries & eco- biogeographic proxies",
"geographic boundaries & management units",
"low frequency combinations",
"others")
### add them
## ind2
ind2_data$defined_populations_nicenames<-as.factor(ind2_data$defined_populations_simplified)
# add new nice names
levels(ind2_data$defined_populations_nicenames)<-nice_names_or
# change to desired order
ind2_data$defined_populations_nicenames<-factor(ind2_data$defined_populations_nicenames,
levels=nice_names)
# metadata
metadata$defined_populations_nicenames<-as.factor(metadata$defined_populations_simplified)
levels(metadata$defined_populations_nicenames)<-nice_names_or
metadata$defined_populations_nicenames<-factor(metadata$defined_populations_nicenames,
levels=nice_names)
#check names match
select(metadata, defined_populations_nicenames, defined_populations_simplified)
Records by country, including taxa assessed more than once (see below for details on this)
ggplot(metadata, aes(x=country_assessment)) +
geom_bar(stat = "count") +
xlab("") +
ggtitle("Number of taxa assessed by country, including taxa assed more than once") +
theme_light()
To explore what kind of taxa countries assessed regardless of if they assessed them once or more, lets create a dataset keeping all single assessed taxa, plus only the first assessment for taxa assessed multiple times.
# object with single assessed taxa, plus only the first assessment for taxa assessed multiple times
metadata_firstmulti<-metadata[!duplicated(cbind(metadata$taxon, metadata$country_assessment)), ]
How many taxa were assessed (i.e. counting only once taxa that were assessed multiple times)?
# how many?
nrow(metadata_firstmulti)
## [1] 877
Plot taxa assessed excluding duplicates, i.e. the real number of taxa assessed:
ggplot(metadata_firstmulti, aes(x=country_assessment)) +
geom_bar(stat = "count") +
xlab("") +
ggtitle("Number of taxa assessed by country") +
theme_light()
Of which countries and taxonomic groups are the taxa that were assessed more than once?
metadata_firstmulti %>% # we use the _unique dataset so that multiassesed records are counted only once
filter(multiassessment=="multiassessment") %>%
ggplot(aes(x=taxonomic_group, fill=country_assessment)) +
geom_bar(stat = "count") +
theme(axis.text.x = element_text(angle = 45)) +
labs(fill="Country") +
xlab("") +
ggtitle("Number of taxa assessed more than once") +
theme_light()
Countries have population size data (Nc or Ne) regardless of the taxonomic group.
ggplot(metadata, aes(x=taxonomic_group, fill=popsize_data)) +
geom_bar(stat = "count") +
coord_flip() +
facet_wrap(~country_assessment, ncol = 5) +
scale_fill_manual(values=c("#2ca02c", "#1f77b4", "grey80"),
breaks=c("yes", "data_for_species", "insuff_data_species"),
labels=c("Population level", "Species or subspecies level", "Insufficient data")) +
labs(fill="Population size data availability",
x="",
y="Number of taxa (including records of taxa assessed more than once)") +
theme_light() +
theme(panel.border = element_blank(), legend.position="top")
Same plot but including a panel for the entire dataset:
## Duplicate data with an additional column "facet"
df<-CreateAllFacet(metadata, "country_assessment")
# order with "all" as last
df$facet <- factor(df$facet, levels=c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US", "all"))
# Plot
ggplot(df, aes(x=taxonomic_group, fill=popsize_data)) +
geom_bar(stat = "count") +
coord_flip() +
facet_wrap(~facet, ncol = 5, scales="free_x") +
scale_fill_manual(values=c("#2ca02c", "#1f77b4", "grey80"),
breaks=c("yes", "data_for_species", "insuff_data_species"),
labels=c("Population level", "Species or subspecies level", "Insufficient data")) + labs(fill="Population size data availability",
x="",
y="Number of taxa (including records of taxa assessed more than once)") +
theme_light() +
theme(panel.border = element_blank(), legend.position="top")
Population size data availability in the entire dataset:
ggplot(metadata, aes(x=taxonomic_group, fill=popsize_data)) +
geom_bar(stat = "count") +
coord_flip() +
scale_fill_manual(values=c("#1f77b4", "grey80", "#2ca02c"),
breaks=c(levels(as.factor(metadata$popsize_data))),
labels=c("Species level or subspecies level", "Insufficient data", "Population level")) +
labs(fill="Population size data availability",
x="",
y="Number of taxa (including records of taxa assessed more than once)") +
theme_light() +
theme(legend.position="right")
Ne available by taxa?
p1<- metadata %>%
filter(!is.na(ne_pops_exists)) %>%
filter(ne_pops_exists!="other_genetic_info") %>%
ggplot(aes(x=country_assessment, fill=ne_pops_exists)) +
geom_bar() +
scale_fill_manual(labels=c("no", "yes"),
breaks=c("no_genetic_data", "ne_available"),
values=c("#ff7f0e", "#2ca02c")) +
xlab("") +
ylab("Number of taxa") +
labs(fill="Ne available \n(from genetic data)") +
theme_light() +
theme(text = element_text(size = 14), legend.position = "right", panel.border = element_blank())
p1
Nc data available by taxa?
p2<-metadata %>%
filter(!is.na(nc_pops_exists)) %>%
ggplot(aes(x=country_assessment, fill=nc_pops_exists)) +
geom_bar() +
scale_fill_manual(values=c("#ff7f0e", "#2ca02c")) +
labs(fill="Nc available") +
xlab("") +
ylab("Number of taxa") +
theme_light() +
theme(text = element_text(size = 14), legend.position = "right", panel.border = element_blank())
p2
What kind of Nc data?
p3<-ind1_data %>%
filter(!is.na(NcType)) %>%
ggplot(aes(x=country_assessment, fill=NcType))+
geom_bar(position = "dodge") +
scale_fill_manual(labels=c("Point", "Range \nor qualitative", "Unknown"),
breaks=c("Nc_point", "Nc_range", "unknown"),
values=c("#0072B2", "#E69F00", "grey80")) +
xlab("") +
ylab("Number of populations") +
labs(fill="Type of Nc data \nby population") +
theme_light() +
theme(text = element_text(size = 14), legend.position = "right", panel.border = element_blank())
p3
Distribution of Nc, Ne and types of Ne in a single figure with 3 panels:
# plot
plot_grid(p1, p2, p3, ncol=1, rel_heights = c(1,1,1.3), rel_widths = c(1,1,1.3), align = "v", labels=c("a)", "b)", "c)"), vjust = .7)
Same than above, but using percentages instead of count data
Ne available by taxa?
p1.1<- metadata %>%
filter(!is.na(ne_pops_exists)) %>%
filter(ne_pops_exists!="other_genetic_info") %>%
ggplot(aes(x=country_assessment, fill=ne_pops_exists)) +
geom_bar(position="fill") +
scale_fill_manual(labels=c("no", "yes"),
breaks=c("no_genetic_data", "ne_available"),
values=c("#ff7f0e", "#2ca02c")) +
xlab("") +
ylab("Proportion of taxa") +
labs(fill="Ne available \n(from genetic data)") +
theme_light() +
theme(text = element_text(size = 14), legend.position = "right", panel.border = element_blank())
p1.1
Nc available?
p2.1<-metadata %>%
filter(!is.na(nc_pops_exists)) %>%
ggplot(aes(x=country_assessment, fill=nc_pops_exists)) +
geom_bar(position = "fill") +
scale_fill_manual(values=c("#ff7f0e", "#2ca02c")) +
labs(fill="Nc available") +
xlab("") +
ylab("Proportion of taxa") +
theme_light() +
theme(text = element_text(size = 14), legend.position = "right", panel.border = element_blank())
p2.1
What kind of Nc data?
p3.1<-ind1_data %>%
filter(!is.na(NcType)) %>%
ggplot(aes(x=country_assessment, fill=NcType))+
geom_bar(position = "fill") +
scale_fill_manual(labels=c("Point", "Range \nor qualitative", "Unknown"),
breaks=c("Nc_point", "Nc_range", "unknown"),
values=c("#0072B2", "#E69F00", "grey80")) +
xlab("") +
ylab("Proportion of populations") +
labs(fill="Type of Nc data \nby population") +
theme_light() +
theme(text = element_text(size = 14), legend.position = "right", panel.border = element_blank())
p3.1
Distribution of Nc, Ne and types of Ne in a single figure with 3 panels and proportions instead of count:
# plot
plot_grid(p1.1, p2.1, p3.1, ncol=1, rel_widths = c(1,1,1), align = "v", labels=c("a)", "b)", "c)"), vjust = .7)
Distribution of Nc, Ne and types of Ne in a single figure with 3 panels, using count for a & b, and proportions for c:
# plot
plot_grid(p1, p2, p3.1, ncol=1, rel_widths = c(1,1,1), align = "v", labels=c("a)", "b)", "c)"), vjust = .7)
Reformat data
select(metadata, defined_populations_nicenames, defined_populations_simplified)
# reformat data
foralluvial<-metadata %>% group_by(country_assessment, defined_populations_nicenames, taxonomic_group) %>%
summarise(n=n())
## `summarise()` has grouped output by 'country_assessment',
## 'defined_populations_nicenames'. You can override using the `.groups` argument.
# define colors
my_cols<- simplifiedmethods_colors
# we need a vector of colors by country for each row of the dataset, so:
methodspop<-as.factor(foralluvial$defined_populations_nicenames)
levels(methodspop)<-my_cols
methodspop<-as.vector(methodspop)
head(methodspop)
## [1] "#b34656" "#668cd1" "#668cd1" "#668cd1" "#668cd1" "#668cd1"
Plot
# plot
alluvial(foralluvial[,1:3], freq = foralluvial$n,
col=methodspop,
blocks=FALSE,
gap.width = 0.5,
cex=.8,
xw = 0.1,
cw = 0.2,
border = NA,
alpha = .7)
Plot Number of mantained populations by country and method
ind2_data %>%
filter(n_extant_populations<500) %>% # filter outliers
# order countries vertically by similar number of pops
mutate(country_assessment = factor(country_assessment,
levels=c("Colombia", "Australia", "Belgium",
"Mexico", "France", "US",
"S. Africa", "Japan", "Sweden"))) %>%
ggplot(aes(x=defined_populations_nicenames, y=n_extant_populations,
fill=defined_populations_nicenames, color=defined_populations_nicenames)) +
geom_boxplot() +
geom_jitter(size=.3, width = 0.1, color="black") +
coord_flip() +
facet_wrap(country_assessment ~ ., nrow=3, scales="free_x") +
xlab("") +
ylab("Number of mantained populations") +
scale_fill_manual(values=alpha(simplifiedmethods_colors, .3),
breaks=levels(as.factor(ind2_data$defined_populations_nicenames))) +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(ind2_data$defined_populations_nicenames))) +
scale_x_discrete(limits=rev) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
text = element_text(size = 15))
levels(ind2_data$country_assessment)
## [1] "Australia" "Belgium" "Colombia" "France" "Japan" "Mexico"
## [7] "S. Africa" "Sweden" "US"
Plot number of populations by method
# Prepare data for plot with nice labels:
# sample size of TOTAL populations
sample_size <- ind2_data %>%
filter(!is.na(indicator2)) %>%
group_by(defined_populations_nicenames) %>% summarize(num=n())
# custom axis
## new dataframe
df<-ind2_data %>%
filter(n_extant_populations<500) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(defined_populations_nicenames, " (n= ", num, ")")) %>%
#myaxis needs levels in the same order than defined_populations_nicenames
mutate(myaxis = factor(myaxis,
levels=levels(as.factor(myaxis))[c(1,11,2:10,12)])) # reorders levels
## Joining, by = "defined_populations_nicenames"
# plot for number of pops
p1<- df %>%
ggplot(aes(x=myaxis, y=n_extant_populations, color=defined_populations_nicenames,
fill=defined_populations_nicenames)) +
geom_boxplot() + xlab("") + ylab("Number of mantained populations") +
geom_jitter(size=.4, width = 0.1, color="black") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
plot.margin = unit(c(0, 0, 0, 0), "cm")) + # this is used to decrease the space between plots
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(ind2_data$defined_populations_nicenames))) +
scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
breaks=levels(as.factor(ind2_data$defined_populations_nicenames))) +
scale_x_discrete(limits=rev) +
theme(text = element_text(size = 13))
p1
Prepare data for ANOVA:
# subset data without massive outlier
ind2_data_anova<- ind2_data %>%
filter(n_extant_populations<1000)
# summary of n per variable
table(ind2_data_anova$defined_populations_simplified)
##
## adaptive_traits management_units
## 19
## eco_biogeo_proxies
## 41
## genetic_clusters
## 105
## genetic_clusters eco_biogeo_proxies
## 19
## genetic_clusters geographic_boundaries
## 74
## geographic_boundaries
## 272
## geographic_boundaries adaptive_traits
## 32
## geographic_boundaries eco_biogeo_proxies
## 74
## geographic_boundaries management_units
## 25
## low_freq_combinations
## 101
## management_units
## 127
## other
## 13
One way ANOVA on the effect of the methods to define populations on the number of populations
# One way ANOVA method
res.anova.extant<-aov(n_extant_populations ~ defined_populations_simplified, data=ind2_data_anova)
res.anova.extant
## Call:
## aov(formula = n_extant_populations ~ defined_populations_simplified,
## data = ind2_data_anova)
##
## Terms:
## defined_populations_simplified Residuals
## Sum of Squares 34724.1 1798790.6
## Deg. of Freedom 11 890
##
## Residual standard error: 44.95679
## Estimated effects may be unbalanced
Summary:
summary(res.anova.extant)
## Df Sum Sq Mean Sq F value Pr(>F)
## defined_populations_simplified 11 34724 3157 1.562 0.105
## Residuals 890 1798791 2021
Check for pairs that are statistically different (Tukey Honest Significant Differences, p < 0.05):
# only padj < 0.05
x<-TukeyHSD(res.anova.extant)
x<-as.data.frame(x[[1]])
x[x$`p adj`<0.05, ]
Plot
# Prepare data for plot with nice labels:
# sample size of TOTAL populations
sample_size <- ind2_data %>%
filter(!is.na(indicator2)) %>%
group_by(defined_populations_nicenames) %>% summarize(num=n())
# custom axis
## new dataframe
df<-ind2_data %>%
filter(n_extant_populations<500) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(defined_populations_nicenames, " (n= ", num, ")")) %>%
#myaxis needs levels in the same order than defined_populations_nicenames
mutate(myaxis = factor(myaxis,
levels=levels(as.factor(myaxis))[c(1,11,2:10,12)])) # reorders levels
## Joining, by = "defined_populations_nicenames"
## plot for indicator 2
p2<- df %>%
filter(n_extant_populations<500) %>%
ggplot(aes(x=myaxis, y=indicator2, color=defined_populations_simplified,
fill=defined_populations_simplified)) +
geom_boxplot() + xlab("") + ylab("Indicator 2") +
geom_jitter(size=.4, width = 0.1, color="black") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
plot.margin = unit(c(0, 0, 0, 0), "cm")) + # this is used to decrease the space between plots)
scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
breaks=levels(as.factor(ind2_data$defined_populations_simplified))) +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(ind2_data$defined_populations_simplified))) +
scale_x_discrete(limits=rev) +
theme(text = element_text(size = 13))
p2
## Warning: Removed 338 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 338 rows containing missing values (`geom_point()`).
Prepare data
# subset data without massive outlier
ind2_data_anova<- ind2_data %>%
filter(indicator2<1000)
# summary of n per variable
table(ind2_data_anova$defined_populations_simplified)
##
## adaptive_traits management_units
## 19
## eco_biogeo_proxies
## 30
## genetic_clusters
## 50
## genetic_clusters eco_biogeo_proxies
## 12
## genetic_clusters geographic_boundaries
## 46
## geographic_boundaries
## 180
## geographic_boundaries adaptive_traits
## 32
## geographic_boundaries eco_biogeo_proxies
## 56
## geographic_boundaries management_units
## 17
## low_freq_combinations
## 69
## management_units
## 45
## other
## 9
One way ANOVA
# One way ANOVA method
res.anova.indicator2<-aov(indicator2 ~ defined_populations_simplified, data=ind2_data_anova)
summary(res.anova.indicator2)
## Df Sum Sq Mean Sq F value Pr(>F)
## defined_populations_simplified 11 3.349 0.30448 5.42 3.22e-08 ***
## Residuals 553 31.065 0.05618
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Summary:
summary(res.anova.indicator2)
## Df Sum Sq Mean Sq F value Pr(>F)
## defined_populations_simplified 11 3.349 0.30448 5.42 3.22e-08 ***
## Residuals 553 31.065 0.05618
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Check for pairs that are statistically different (Tukey Honest Significant Differences, p < 0.05):
# only padj < 0.05
x<-TukeyHSD(res.anova.indicator2)
x<-as.data.frame(x[[1]])
x[x$`p adj`<0.05, ]
Scatter plot of indicator2 and extant pops
p3<- ind2_data %>%
# filter outliers with too many pops
filter(n_extant_populations<500) %>%
# plot
ggplot(aes(x=n_extant_populations, y=indicator2, color=defined_populations_nicenames)) +
geom_point() +
theme_light() +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(ind2_data$defined_populations_nicenames))) +
theme(legend.position = "none") +
ylab("Indicator 2") +
xlab("Number of mantained populations") +
theme(text = element_text(size = 13))
p3
## Warning: Removed 338 rows containing missing values (`geom_point()`).
Prepare data
# remove missing data
ind2_data_wo_missing<-ind2_data %>%
filter(!is.na(indicator2)) %>%
filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots
# check n per method
table(ind2_data_wo_missing$defined_populations_simplified)
##
## adaptive_traits management_units
## 19
## eco_biogeo_proxies
## 30
## genetic_clusters
## 50
## genetic_clusters eco_biogeo_proxies
## 12
## genetic_clusters geographic_boundaries
## 46
## geographic_boundaries
## 178
## geographic_boundaries adaptive_traits
## 32
## geographic_boundaries eco_biogeo_proxies
## 56
## geographic_boundaries management_units
## 17
## low_freq_combinations
## 69
## management_units
## 45
## other
## 9
Run GLM model
# run model
m2 <- glm(ind2_data_wo_missing$indicator2 ~ ind2_data_wo_missing$n_extant_populations +
ind2_data_wo_missing$defined_populations_simplified +
ind2_data_wo_missing$n_extant_populations*ind2_data_wo_missing$defined_populations_simplified, family = "quasibinomial")
m2
##
## Call: glm(formula = ind2_data_wo_missing$indicator2 ~ ind2_data_wo_missing$n_extant_populations +
## ind2_data_wo_missing$defined_populations_simplified + ind2_data_wo_missing$n_extant_populations *
## ind2_data_wo_missing$defined_populations_simplified, family = "quasibinomial")
##
## Coefficients:
## (Intercept)
## 2.40748
## ind2_data_wo_missing$n_extant_populations
## 0.04605
## ind2_data_wo_missing$defined_populations_simplifiedeco_biogeo_proxies
## -0.90120
## ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters
## 0.48522
## ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies
## 1.73254
## ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters geographic_boundaries
## -0.27973
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries
## -1.00348
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries adaptive_traits
## 0.12491
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## -1.08795
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries management_units
## -0.65696
## ind2_data_wo_missing$defined_populations_simplifiedlow_freq_combinations
## -0.70210
## ind2_data_wo_missing$defined_populations_simplifiedmanagement_units
## -1.81268
## ind2_data_wo_missing$defined_populations_simplifiedother
## -2.49424
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedeco_biogeo_proxies
## -0.04288
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters
## -0.12867
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies
## -0.18537
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters geographic_boundaries
## -0.05244
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries
## -0.04408
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries adaptive_traits
## -0.03108
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## -0.05005
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries management_units
## 0.05238
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedlow_freq_combinations
## -0.04496
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedmanagement_units
## -0.05417
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedother
## 0.66367
##
## Degrees of Freedom: 562 Total (i.e. Null); 539 Residual
## Null Deviance: 246.2
## Residual Deviance: 214.7 AIC: NA
Summary:
summary(m2)
##
## Call:
## glm(formula = ind2_data_wo_missing$indicator2 ~ ind2_data_wo_missing$n_extant_populations +
## ind2_data_wo_missing$defined_populations_simplified + ind2_data_wo_missing$n_extant_populations *
## ind2_data_wo_missing$defined_populations_simplified, family = "quasibinomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8020 -0.3222 0.3601 0.5988 0.9748
##
## Coefficients:
## Estimate
## (Intercept) 2.40748
## ind2_data_wo_missing$n_extant_populations 0.04605
## ind2_data_wo_missing$defined_populations_simplifiedeco_biogeo_proxies -0.90120
## ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters 0.48522
## ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 1.73254
## ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.27973
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries -1.00348
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries adaptive_traits 0.12491
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -1.08795
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries management_units -0.65696
## ind2_data_wo_missing$defined_populations_simplifiedlow_freq_combinations -0.70210
## ind2_data_wo_missing$defined_populations_simplifiedmanagement_units -1.81268
## ind2_data_wo_missing$defined_populations_simplifiedother -2.49424
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedeco_biogeo_proxies -0.04288
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters -0.12867
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies -0.18537
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.05244
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries -0.04408
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries adaptive_traits -0.03108
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.05005
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries management_units 0.05238
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedlow_freq_combinations -0.04496
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedmanagement_units -0.05417
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedother 0.66367
## Std. Error
## (Intercept) 0.78714
## ind2_data_wo_missing$n_extant_populations 0.12562
## ind2_data_wo_missing$defined_populations_simplifiedeco_biogeo_proxies 0.85777
## ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters 0.93775
## ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 1.52683
## ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.84635
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries 0.79812
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries adaptive_traits 0.92914
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.81582
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries management_units 1.01984
## ind2_data_wo_missing$defined_populations_simplifiedlow_freq_combinations 0.82259
## ind2_data_wo_missing$defined_populations_simplifiedmanagement_units 0.82101
## ind2_data_wo_missing$defined_populations_simplifiedother 1.22073
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedeco_biogeo_proxies 0.12576
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters 0.16613
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.13815
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.12566
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries 0.12567
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries adaptive_traits 0.13199
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.12567
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries management_units 0.20612
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedlow_freq_combinations 0.12620
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedmanagement_units 0.12677
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedother 0.58195
## t value
## (Intercept) 3.059
## ind2_data_wo_missing$n_extant_populations 0.367
## ind2_data_wo_missing$defined_populations_simplifiedeco_biogeo_proxies -1.051
## ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters 0.517
## ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 1.135
## ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.331
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries -1.257
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries adaptive_traits 0.134
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -1.334
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries management_units -0.644
## ind2_data_wo_missing$defined_populations_simplifiedlow_freq_combinations -0.854
## ind2_data_wo_missing$defined_populations_simplifiedmanagement_units -2.208
## ind2_data_wo_missing$defined_populations_simplifiedother -2.043
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedeco_biogeo_proxies -0.341
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters -0.775
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies -1.342
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.417
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries -0.351
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries adaptive_traits -0.235
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.398
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries management_units 0.254
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedlow_freq_combinations -0.356
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedmanagement_units -0.427
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedother 1.140
## Pr(>|t|)
## (Intercept) 0.00233
## ind2_data_wo_missing$n_extant_populations 0.71407
## ind2_data_wo_missing$defined_populations_simplifiedeco_biogeo_proxies 0.29390
## ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters 0.60507
## ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.25699
## ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.74114
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries 0.20919
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries adaptive_traits 0.89311
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.18291
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries management_units 0.51973
## ind2_data_wo_missing$defined_populations_simplifiedlow_freq_combinations 0.39374
## ind2_data_wo_missing$defined_populations_simplifiedmanagement_units 0.02767
## ind2_data_wo_missing$defined_populations_simplifiedother 0.04151
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedeco_biogeo_proxies 0.73325
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters 0.43896
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.18024
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.67658
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries 0.72594
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries adaptive_traits 0.81395
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.69058
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries management_units 0.79949
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedlow_freq_combinations 0.72178
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedmanagement_units 0.66933
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedother 0.25461
##
## (Intercept) **
## ind2_data_wo_missing$n_extant_populations
## ind2_data_wo_missing$defined_populations_simplifiedeco_biogeo_proxies
## ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters
## ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies
## ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters geographic_boundaries
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries adaptive_traits
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries management_units
## ind2_data_wo_missing$defined_populations_simplifiedlow_freq_combinations
## ind2_data_wo_missing$defined_populations_simplifiedmanagement_units *
## ind2_data_wo_missing$defined_populations_simplifiedother *
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedeco_biogeo_proxies
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgenetic_clusters geographic_boundaries
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries adaptive_traits
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedgeographic_boundaries management_units
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedlow_freq_combinations
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedmanagement_units
## ind2_data_wo_missing$n_extant_populations:ind2_data_wo_missing$defined_populations_simplifiedother
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasibinomial family taken to be 0.3995451)
##
## Null deviance: 246.17 on 562 degrees of freedom
## Residual deviance: 214.70 on 539 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 7
Plot in three panels
## plot for indicator 2 without axis labels
p2<-ind2_data %>%
filter(n_extant_populations<500) %>%
ggplot(aes(x=defined_populations_simplified, y=indicator2, color=defined_populations_simplified)) +
geom_boxplot() + xlab("") + ylab("Indicator 2") +
geom_jitter(size=.4, width = 0.1, color="black") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="none", axis.text.y = element_blank(),
plot.margin = unit(c(0, 0, 0, 0), "cm")) + # this is used to decrease the space between plots) +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(ind2_data$defined_populations_simplified))) +
theme(text = element_text(size = 13))
p2
## Warning: Removed 338 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 338 rows containing missing values (`geom_point()`).
plot_grid(p1, p2, p3, ncol=3, rel_widths = c(2,1,1), align = "h", labels=c("a)", "b)", "c)"))
## Warning: Removed 338 rows containing non-finite values (`stat_boxplot()`).
## Removed 338 rows containing missing values (`geom_point()`).
## Removed 338 rows containing missing values (`geom_point()`).
Option 2:
# panel labels
p1<-p1+ggtitle("a)")
p2<-p2+ggtitle("b)")
p3<-p3+ggtitle("c)")
# nested plot!
panel_figure <- plot_grid(
plot_grid(p1, p2, ncol = 2, rel_widths = c(1.5,1)), # Arranging p1 and p2 in 2 columns
p3, # Placing p3 centered below p1 and p2
ncol = 1, # Single column layout
rel_heights = c(2, 1) # Adjust the relative heights of rows
)
## Warning: Removed 338 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 338 rows containing missing values (`geom_point()`).
## Removed 338 rows containing missing values (`geom_point()`).
panel_figure
We have NA in indicator 2 because in some cases the number of extinct populations is unknown, therefore the operation cannot be computed.
Total records with NA in extant populations:
sum(is.na(ind2_data$n_extant_populations))
## [1] 21
Taxa with NA in extant populations:
ind2_data %>%
filter(is.na(n_extant_populations)) %>%
select(country_assessment, taxonomic_group, taxon, n_extant_populations, n_extint_populations)
Total taxa with NA in extinct populations:
sum(is.na(ind2_data$n_extint_populations))
## [1] 359
Do taxa with NA for extant also have NA for extinct?
ind2_data$taxon[is.na(ind2_data$n_extant_populations)] %in% ind2_data$taxon[is.na(ind2_data$n_extint_populations)]
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE
So out of the 924, we have 359 records with NA in n_extinct and 21 records with NA in n_extant. Of them, 21 have NA in both n_extant and n_extinct.
ind2_data %>%
ggplot(aes(x=country_assessment, fill=is.na(n_extint_populations))) +
geom_bar() +
scale_fill_manual(labels=c("number of populations known", "missing data"),
values=c("#2ca02c", "#ff7f0e")) +
labs(fill="Extinct populations") +
xlab("") + ylab("Number of taxa") +
theme_light() +
theme(panel.border = element_blank())
Missing data in number of extinct populations by method to define populations:
ind2_data %>%
ggplot(aes(x=defined_populations_nicenames, fill=is.na(n_extint_populations)))+
geom_bar() +
coord_flip()+
scale_fill_manual(labels=c("number of populations known", "missing data"),
values=c("#2ca02c", "#ff7f0e")) +
labs(fill="Extinct populations") +
xlab("") + ylab("Number of taxa") +
facet_wrap(country_assessment ~., nrow = 3, scales="free_x") +
theme_light() +
theme(panel.border = element_blank(), legend.position="top")
## Indicator 1 by type of range
Indicator 1 by type of range in the entire dataset:
# get sample size by desired category
sample_size <- indicator1 %>%
filter(!is.na(indicator1)) %>%
group_by(species_range) %>% summarize(num=n())
# plot
indicator1 %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(species_range, " (n= ", num, ")")) %>%
# plot
ggplot(aes(x=myaxis, y=indicator1 , fill=species_range)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of popuations with Ne > 500") +
coord_flip() +
scale_fill_manual(breaks=c("wide_ranging", "restricted", "unknown"),
labels=c("wide ranging", "restricted", "unknown"),
values=c("#F8766D", "#00BFC4", "grey80")) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none", text= element_text(size=20))
## Joining, by = "species_range"
## Warning: Removed 21 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 21 rows containing missing values (`geom_point()`).
Indicator 1 by country and type of range
### Duplicate dataframe to have a column with "all data" for faceting
df<-CreateAllFacet(indicator1 , "country_assessment")
# order with "all" as last
df$facet <- factor(df$facet, levels=c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US", "all"))
## plot
df %>%
# filter out "unknown" range
filter(species_range!="unknown") %>%
# plot
ggplot(aes(x=species_range, y=indicator1 , fill=species_range)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of popuations with Ne > 500") +
coord_flip() +
scale_x_discrete(breaks=c("wide_ranging", "restricted"),
labels=c("wide ranging", "restricted")) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none", text= element_text(size=15)) +
facet_wrap(~facet, ncol = 5) +
theme(panel.spacing = unit(1.5, "lines"))
## Warning: Removed 38 rows containing non-finite values (`stat_ydensity()`).
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Removed 38 rows containing missing values (`geom_point()`).
Test the effect of range on indicator1 with country as a random
effect.
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Remove unknown
data<-indicator1 %>%
filter(species_range!="unknown")
## run model with glm
m1 <- glm(data$indicator1 ~ data$species_range + data$country_assessment, family = "quasibinomial")
summary(m1)
##
## Call:
## glm(formula = data$indicator1 ~ data$species_range + data$country_assessment,
## family = "quasibinomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4645 -0.7852 -0.4774 0.6668 2.1734
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.2628 0.3683 -6.143 1.65e-09 ***
## data$species_rangewide_ranging 1.4408 0.2055 7.012 7.64e-12 ***
## data$country_assessmentBelgium 0.1485 0.4117 0.361 0.718474
## data$country_assessmentColombia 2.0978 0.5347 3.923 9.95e-05 ***
## data$country_assessmentFrance 1.2441 0.4297 2.895 0.003954 **
## data$country_assessmentJapan -0.8043 0.5883 -1.367 0.172194
## data$country_assessmentMexico 0.3593 0.4711 0.763 0.446102
## data$country_assessmentS. Africa 1.1134 0.4212 2.644 0.008459 **
## data$country_assessmentSweden 0.2644 0.4261 0.620 0.535282
## data$country_assessmentUS 1.4754 0.4152 3.554 0.000416 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasibinomial family taken to be 0.7431228)
##
## Null deviance: 485.73 on 511 degrees of freedom
## Residual deviance: 411.72 on 502 degrees of freedom
## (19 observations deleted due to missingness)
## AIC: NA
##
## Number of Fisher Scoring iterations: 5
# or with lme4?
m2 <-lmer(indicator1 ~ species_range + (1|country_assessment), data = data)
summary(m2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: indicator1 ~ species_range + (1 | country_assessment)
## Data: data
##
## REML criterion at convergence: 462.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.5490 -0.7841 -0.3282 0.7966 2.4470
##
## Random effects:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.01771 0.1331
## Residual 0.13742 0.3707
## Number of obs: 512, groups: country_assessment, 9
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.19357 0.04959 3.903
## species_rangewide_ranging 0.25798 0.03561 7.245
##
## Correlation of Fixed Effects:
## (Intr)
## spcs_rngwd_ -0.273
Indicator 1 by global IUCN in the entire dataset:
## Global IUCN
## prepare data
# add sampling size
sample_size <- indicator1 %>%
filter(!is.na(indicator1)) %>%
group_by(global_IUCN) %>% summarize(num=n())
# new df
df<- indicator1 %>%
filter(!is.na(indicator1)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(global_IUCN, " (n= ", num, ")"))
## Joining, by = "global_IUCN"
# change order of levels so that they are in the desired order
df$myaxis<-factor(df$myaxis,
#grep is used below to get the sample size, which may change depending on the data
levels=c(grep("cr", unique(df$myaxis), value = TRUE),
grep("en", unique(df$myaxis), value = TRUE),
grep("vu", unique(df$myaxis), value = TRUE),
grep("nt", unique(df$myaxis), value = TRUE),
grep("lc", unique(df$myaxis), value = TRUE),
grep("dd", unique(df$myaxis), value = TRUE),
grep("not_assessed", unique(df$myaxis), value = TRUE),
grep("unknown", unique(df$myaxis), value = TRUE)))
df$global_IUCN<-factor(df$global_IUCN, levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))
# plot
p1<-df %>%
ggplot(aes(x=myaxis, y=indicator1 , fill=global_IUCN)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of popuations with Ne > 500") +
coord_flip() +
scale_fill_manual(values= IUCNcolors, # iucn color codes
breaks=c(levels(df$global_IUCN))) +
theme_light() +
ggtitle("a) global Red List") +
theme(panel.border = element_blank(), legend.position="none", text= element_text(size=15))
p1
Indicator 1 by IUCN in the entire dataset:
## Regional IUCN
## prepare data
# add sampling size
sample_size <- indicator1 %>%
filter(!is.na(indicator1)) %>%
group_by(regional_redlist) %>% summarize(num=n())
# new df
df<- indicator1 %>%
filter(!is.na(indicator1)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(regional_redlist, " (n= ", num, ")"))
## Joining, by = "regional_redlist"
# change order of levels so that they are in the desired order
df$myaxis<-factor(df$myaxis,
#grep is used below to get the sample size, which may change depending on the data
levels=c(grep("re", unique(df$myaxis), value = TRUE),
grep("cr", unique(df$myaxis), value = TRUE),
grep("en", unique(df$myaxis), value = TRUE),
grep("vu", unique(df$myaxis), value = TRUE),
grep("nt", unique(df$myaxis), value = TRUE),
grep("lc", unique(df$myaxis), value = TRUE),
grep("dd", unique(df$myaxis), value = TRUE),
grep("not_assessed", unique(df$myaxis), value = TRUE),
grep("unknown", unique(df$myaxis), value = TRUE)))
df$regional_redlist<-factor(df$regional_redlist, levels=c("re","cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))
# plot
p2<-df %>%
ggplot(aes(x=myaxis, y=indicator1 , fill=regional_redlist)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of popuations with Ne > 500") +
coord_flip() +
scale_fill_manual(values= IUCNcolors_regional, # iucn color codes
breaks=c(levels(df$regional_redlist))) +
theme_light() +
ggtitle("b) regional Red List") +
theme(panel.border = element_blank(), legend.position="none", text= element_text(size=15))
p2
Both together
plot_grid(p1, p2, ncol=2)
Indicator 1 by country and global IUCN
## change order of levels so that categories match with the order of colors
indicator1$global_IUCN<-factor(indicator1$global_IUCN, levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))
# plot
indicator1 %>%
# plot
ggplot(aes(x=global_IUCN, y=indicator1, fill=global_IUCN)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of popuations with Ne > 500") +
coord_flip() +
scale_fill_manual(values= IUCNcolors, # iucn color codes
breaks=c(levels(indicator1$global_IUCN))) +
theme_light() +
ggtitle("global IUCN Redlist") +
theme(panel.border = element_blank(), legend.position="none", text= element_text(size=13)) +
facet_wrap(~country_assessment, ncol = 3) +
theme(panel.spacing = unit(1.5, "lines"))
## Warning: Removed 21 rows containing non-finite values (`stat_ydensity()`).
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Warning: Removed 21 rows containing missing values (`geom_point()`).
Indicator1 by regional IUCN Redlist, excluding US and Mexico becasue they don’t have a regional IUCN redlist.
## change order of levels so that categories match with the order of colors
indicator1$regional_redlist<-factor(indicator1$regional_redlist, levels=c("re","cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))
# plot
indicator1 %>%
# filter US and Mx
filter(country_assessment!="Mexico", country_assessment!="US") %>%
# plot
ggplot(aes(x=regional_redlist, y=indicator1, fill=regional_redlist)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of popuations with Ne > 500") +
coord_flip() +
scale_fill_manual(values= IUCNcolors_regional, # iucn color codes
breaks=c(levels(indicator1$regional_redlist))) +
theme_light() +
ggtitle("regional IUCN Redlist") +
theme(panel.border = element_blank(), legend.position="none", text= element_text(size=15)) +
facet_wrap(~country_assessment, ncol = 4) +
theme(panel.spacing = unit(1.5, "lines"))
## Warning: Removed 11 rows containing non-finite values (`stat_ydensity()`).
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Warning: Removed 11 rows containing missing values (`geom_point()`).
#subset only with taxa assessed multiple times:
only_multi<-indicators_full %>%
filter(multiassessment=="multiassessment")
First, check how indicator 1 changes across the multiassessments.
only_multi %>%
# filter species with missing data for the desired indicator
filter(indicator1!=is.na(indicator1)) %>%
# Keep rows with different values in indicator1 within each taxon group
group_by(taxon) %>%
filter(n_distinct(indicator1) > 1) %>%
ggplot(aes(x=taxon, y=indicator1)) +
geom_line(colour="darkgrey") +
geom_point(aes(color=country_assessment)) +
xlab("") + ylab("Proportion of popuations with Ne > 500") +
labs(color="country") +
ylim(0, 1)+
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="right", text= element_text(size=13))
Now check how indicator 2 changes across the multiassessments. To be able to visualize the missing data, the following plot changes NA to -1.
only_multi %>%
# filter species with missing data for the desired indicator
filter(indicator2!=is.na(indicator2)) %>%
ggplot(aes(x=taxon, y=indicator2)) +
geom_line(colour="darkgrey") +
geom_point(aes(color=country_assessment)) +
xlab("") + ylab("Proportion of populations mantained ") +
labs(color="country") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="right", text= element_text(size=13))